Skip to content

Commit 60e1946

Browse files
committed
AWS Aurora autoscaler README & tiny text corrections
Signed-off-by: Dmitry Shurupov <[email protected]>
1 parent e413ef6 commit 60e1946

File tree

3 files changed

+87
-73
lines changed

3 files changed

+87
-73
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
Lambda functions written in Python to perform AWS Aurora databases
2+
vertical autoscaling:
3+
4+
1. _Alarm_ (`rds_vscale_alarm_lambda.py`) is a lambda function triggered
5+
when the CPU Load Average value exceeds a threshold. It finds a suitable
6+
small instance within an RDS cluster and performs its scaling to
7+
the next possible instance type.
8+
2. _Event_ (`rds_vscale_event_lambda.py`) is a lambda function triggered
9+
when the instance modification initiated by _Alarm_ is completed. It
10+
scales the rest smallest RDS instances bringing them to the same size.
11+
12+
This code is used (and better described) in the following article:
13+
* [“Implementing vertical autoscaling for Aurora databases using Lambda functions in AWS”](https://blog.palark.com/aws-rds-aurora-vertical-autoscaling/)
14+
(published in April 2024)

2024/aws-lambda-db-vertical-autoscaler/rds_vscale_alarm_lambda.py

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def send_sns_alert(message):
3131

3232
def get_instance_details(client, instance_identifier):
3333
"""
34-
get instance details
34+
get the instance details
3535
"""
3636
try:
3737
response = client.describe_db_instances(DBInstanceIdentifier=instance_identifier)
@@ -40,21 +40,21 @@ def get_instance_details(client, instance_identifier):
4040
cluster_identifier = instance_info.get('DBClusterIdentifier', None)
4141
return instance_class, cluster_identifier
4242
except ClientError as e:
43-
error_message = f"Error getting DB instance details: {e}"
43+
error_message = f"Error getting the DB instance details: {e}"
4444
print(error_message)
4545
send_sns_alert(error_message)
4646
return None, None
4747

4848
def get_cluster_version(client, cluster_identifier):
4949
"""
50-
get cluster version
50+
get the cluster version
5151
"""
5252
try:
5353
response = client.describe_db_clusters(DBClusterIdentifier=cluster_identifier)
5454
cluster_info = response['DBClusters'][0]
5555
return cluster_info['EngineVersion']
5656
except ClientError as e:
57-
error_message = f"Error getting DB cluster version: {e}"
57+
error_message = f"Error getting the DB cluster version: {e}"
5858
print(error_message)
5959
send_sns_alert(error_message)
6060
return None
@@ -70,7 +70,7 @@ def instance_type_sorter(instance_type):
7070

7171
def change_instance_type(client, instance_identifier, new_instance_type):
7272
"""
73-
change instance type
73+
change the instance type
7474
"""
7575
try:
7676
response = client.modify_db_instance(
@@ -80,32 +80,32 @@ def change_instance_type(client, instance_identifier, new_instance_type):
8080
)
8181
return response, None
8282
except ClientError as e:
83-
error_message = f"An error during the attempt to vertically scale the RDS instance {e}"
83+
error_message = f"Error during an attempt to vertically scale the RDS instance {e}"
8484
print(error_message)
8585
send_sns_alert(error_message)
8686
return None, str(e)
8787

8888
def get_instance_arn(client, instance_identifier):
8989
"""
90-
get instance arn
90+
get the instance arn
9191
"""
9292
try:
9393
instance_info = client.describe_db_instances(DBInstanceIdentifier=instance_identifier)
9494
return instance_info['DBInstances'][0]['DBInstanceArn']
9595
except ClientError as e:
96-
error_message = f"Error getting instance ARN for {instance_identifier}: {e}"
96+
error_message = f"Error getting the instance ARN for {instance_identifier}: {e}"
9797
print(error_message)
9898
send_sns_alert(error_message)
9999
return None
100100

101101

102102
def add_modifying_tag(client, instance_identifier):
103103
"""
104-
add modifying tag and timestamp to prevent a few actions in one period
104+
add the modifying tag and timestamp to prevent simultaneous actions at the same time
105105
"""
106106
instance_arn = get_instance_arn(client, instance_identifier)
107107
if not instance_arn:
108-
print(f"ARN not found for instance {instance_identifier}")
108+
print(f"ARN not found for the instance {instance_identifier}")
109109
return
110110
timestamp = datetime.now(timezone.utc).isoformat()
111111
try:
@@ -115,15 +115,15 @@ def add_modifying_tag(client, instance_identifier):
115115
{'Key': 'modificationTimestamp', 'Value': timestamp}
116116
]
117117
)
118-
print(f"Added 'modifying' tag to instance {instance_identifier}")
118+
print(f"Added the 'modifying' tag to instance {instance_identifier}")
119119
except ClientError as e:
120-
error_message = f"Error adding 'modifying' tag to {instance_identifier}: {e}"
120+
error_message = f"Error adding the 'modifying' tag to {instance_identifier}: {e}"
121121
print(error_message)
122122
send_sns_alert(error_message)
123123

124124
def any_instance_has_modifying_tag(client, cluster_instances):
125125
"""
126-
search if modifying tag exists
126+
check if the modifying tag exists
127127
"""
128128
for member in cluster_instances:
129129
instance_arn = get_instance_arn(client, member['DBInstanceIdentifier'])
@@ -169,7 +169,7 @@ def modification_timestamps(client, cluster_instances, cooldown_period):
169169

170170
def any_member_modifying(client, cluster_instances):
171171
"""
172-
checking if any cluster instance already modifing
172+
checking if any cluster instance is being modified already
173173
"""
174174
for member in cluster_instances:
175175
instance_info = client.describe_db_instances(DBInstanceIdentifier=member['DBInstanceIdentifier'])
@@ -193,17 +193,17 @@ def lambda_handler(event, _):
193193
break
194194

195195
if db_instance_identifier is None:
196-
raise ValueError("DBInstanceIdentifier not found in CloudWatch Alarm event")
196+
raise ValueError("DBInstanceIdentifier not found in the CloudWatch Alarm event")
197197

198198
_, cluster_identifier = get_instance_details(rds_client, db_instance_identifier)
199199
if not cluster_identifier:
200-
raise ValueError("Instance is not part of any RDS cluster")
200+
raise ValueError("Instance is not a part of any RDS cluster")
201201

202202
cluster_response = rds_client.describe_db_clusters(DBClusterIdentifier=cluster_identifier)
203203
cluster_instances = cluster_response['DBClusters'][0]['DBClusterMembers']
204204

205205
if any_member_modifying(rds_client, cluster_instances):
206-
print("At least one instance in the cluster is currently modifying.")
206+
print("At least one instance in the cluster is currently being modified.")
207207
return
208208

209209
writer_instance_identifier, writer_instance_type = None, None
@@ -228,7 +228,7 @@ def lambda_handler(event, _):
228228

229229
# Ensure writer_instance_type is defined before comparing
230230
if writer_instance_type is None:
231-
raise ValueError("Writer instance type not found in the cluster")
231+
raise ValueError("The writer instance type not found in the cluster")
232232

233233

234234
for member in cluster_instances:
@@ -237,41 +237,41 @@ def lambda_handler(event, _):
237237
if instance_type_sorter(member_instance_type) <= instance_type_sorter(writer_instance_type):
238238
is_writer_smallest = False
239239

240-
# Check if any instance is modifying or has modifying tag
240+
# Check if any instance is being modified or has the modifying tag
241241
if any_instance_has_modifying_tag(rds_client, cluster_instances):
242-
print("An instance in the cluster has 'modifying' tag.")
242+
print("An instance in the cluster has the 'modifying' tag.")
243243
return
244244

245245
cooldown_not_expired = modification_timestamps(rds_client, cluster_instances, MODIFY_COOLDOWN_PERIOD)
246246
if cooldown_not_expired:
247-
message = "An attempt was made to vertically scale the RDS instance in cluster, but Cooldown period has not expired for at least one instance in the cluster."
247+
message = "We tried to vertically scale the RDS instance in the cluster. However, the Cooldown period has not expired for at least one instance in the cluster."
248248
print(message)
249249
print(send_sns_alert)
250250
return
251251

252252
if is_writer_smallest:
253-
# Scaling up writer
253+
# Scaling up the writer
254254
new_writer_instance_type = SIZE_ORDER[writer_size_index + 1]
255-
print(f"Selected new instance type for writer: {new_writer_instance_type}")
255+
print(f"Selected new instance type for the writer: {new_writer_instance_type}")
256256
if new_writer_instance_type != writer_instance_type:
257-
print(f"Attempting to change instance type for {writer_instance_identifier} to {new_writer_instance_type}")
257+
print(f"Attempting to change the instance type for {writer_instance_identifier} to {new_writer_instance_type}")
258258
_, error = change_instance_type(rds_client, writer_instance_identifier, new_writer_instance_type)
259259
if not error:
260-
message = f"Changed writer instance type to {new_writer_instance_type}"
260+
message = f"Changed the writer instance type to {new_writer_instance_type}"
261261
print(message)
262262
send_sns_alert(message)
263263
add_modifying_tag(rds_client, writer_instance_identifier)
264264
else:
265-
error_message = f"Failed to change writer instance type. Error: {error}"
265+
error_message = f"Failed to change the writer instance type. Error: {error}"
266266
print(error_message)
267267
send_sns_alert(error_message)
268268
else:
269-
error_message = "Writer instance is already at the maximum size, scaling is not possible"
269+
error_message = "The writer instance is at the maximum size already; scaling is not possible"
270270
print(error_message)
271271
send_sns_alert(error_message)
272272
continue
273273

274-
# Process reader instances
274+
# Process the reader instances
275275
smallest_size = None
276276
min_size_index = float('inf')
277277
eligible_readers = []
@@ -290,28 +290,28 @@ def lambda_handler(event, _):
290290
reader_to_scale = random.choice(eligible_readers)
291291
new_reader_instance_type = SIZE_ORDER[min_size_index + 1]
292292
if new_reader_instance_type != smallest_size:
293-
print(f"Attempting to change instance type for {reader_to_scale} to {new_reader_instance_type}")
293+
print(f"Attempting to change the instance type for {reader_to_scale} to {new_reader_instance_type}")
294294
_, error = change_instance_type(rds_client, reader_to_scale, new_reader_instance_type)
295295
if not error:
296-
message = f"Changed reader instance type to {new_reader_instance_type}"
296+
message = f"Changed the reader instance type to {new_reader_instance_type}"
297297
print(message)
298298
send_sns_alert(message)
299299
add_modifying_tag(rds_client, reader_to_scale)
300300
else:
301-
error_message = f"Failed to change reader instance type. Error: {error}"
301+
error_message = f"Failed to change the reader instance type. Error: {error}"
302302
print(error_message)
303303
send_sns_alert(error_message)
304304
else:
305-
error_message = "Reader instance is already at the maximum size, scaling is not possible"
305+
error_message = "The reader instance is at the maximum size already; scaling is not possible"
306306
print(error_message)
307307
send_sns_alert(error_message)
308308
else:
309309
print("No eligible readers to scale up.")
310-
send_sns_alert("An attempt was made to vertically scale the RDS instance, but the conditions were not suitable.")
310+
send_sns_alert("We tried to vertically scale the RDS instance. However, the required conditions were not met.")
311311

312312
return {
313313
'statusCode': 200,
314-
'body': json.dumps("Processed instances in cluster.")
314+
'body': json.dumps("Processed instances in the cluster.")
315315
}
316316
except ClientError as e:
317317
error_message = f"Failed to execute the function. Error: {str(e)}"

0 commit comments

Comments
 (0)