Storage Classes Comparison
Google Cloud Storage Architecture
Location Types
| Type | Example | Availability | Use Case |
|---|---|---|---|
| Multi-region | US, EU, ASIA | 99.95% | Global apps, HA + DR |
| Dual-region | US-EAST1 + US-WEST1 | 99.95% | Regional HA, compliance |
| Region | us-central1 | 99.9% | Lowest latency, data residency |
Python SDK: Basic Operations
from google.cloud import storage
from google.oauth2 import service_account
import os
# Initialize client
credentials = service_account.Credentials.from_service_account_file(
'service-account.json'
)
client = storage.Client(credentials=credentials, project='my-project')
# CREATE bucket
bucket_name = 'my-unique-bucket-123'
bucket = client.create_bucket(
bucket_name,
location='US', # Multi-region
storage_class='STANDARD'
)
print(f"Created bucket: {bucket.name}")
# Set uniform bucket-level access (recommended)
bucket.iam_configuration.uniform_bucket_level_access_enabled = True
bucket.patch()
# UPLOAD file
blob = bucket.blob('data/sales.csv')
blob.upload_from_filename('local-sales.csv')
print(f"Uploaded: {blob.name}")
# Set metadata
blob.metadata = {'source': 'crm', 'department': 'sales'}
blob.patch()
# UPLOAD with resumable (for large files > 5 MB)
blob = bucket.blob('backups/database.sql.gz')
blob.chunk_size = 5 * 1024 * 1024 # 5 MB chunks
blob.upload_from_filename('database.sql.gz')
# DOWNLOAD file
blob = bucket.blob('data/sales.csv')
blob.download_to_filename('downloaded-sales.csv')
print(f"Downloaded: {blob.name}")
# LIST objects
blobs = client.list_blobs(bucket_name, prefix='data/')
for blob in blobs:
print(f"{blob.name} - {blob.size} bytes - {blob.updated}")
# DELETE object
blob = bucket.blob('data/old-file.txt')
blob.delete()
# DELETE bucket (must be empty)
bucket.delete()
Object Versioning
# Enable versioning
bucket.versioning_enabled = True
bucket.patch()
# Upload new version
blob = bucket.blob('config.yaml')
blob.upload_from_string('version: 2.0')
print(f"Generation: {blob.generation}")
# List all versions
blobs = bucket.list_blobs(prefix='config.yaml', versions=True)
for blob in blobs:
print(f"Generation {blob.generation}: {blob.updated}")
# Get specific version
blob = bucket.blob('config.yaml', generation=12345)
content = blob.download_as_text()
# Restore old version
old_blob = bucket.blob('config.yaml', generation=12345)
new_blob = bucket.blob('config.yaml')
new_blob.rewrite(old_blob)
Lifecycle Management
import datetime
# Add lifecycle rule (Autoclass)
bucket.add_lifecycle_rule({
'action': {'type': 'SetStorageClass', 'storageClass': 'NEARLINE'},
'condition': {
'age': 30, # Days since upload
'matchesPrefix': ['logs/']
}
})
# Delete old objects
bucket.add_lifecycle_rule({
'action': {'type': 'Delete'},
'condition': {
'age': 365, # 1 year
'matchesPrefix': ['temp/']
}
})
# Save lifecycle rules
bucket.patch()
# Enable Autoclass (automatic class transitions)
bucket.autoclass_enabled = True
bucket.autoclass_toggle_time = datetime.datetime.now()
bucket.patch()
Signed URLs & Notifications
# Generate signed URL (temporary public access)
from datetime import timedelta
blob = bucket.blob('private/report.pdf')
url = blob.generate_signed_url(
version='v4',
expiration=timedelta(hours=1),
method='GET'
)
print(f"Signed URL: {url}")
# Upload with signed URL
upload_url = blob.generate_signed_url(
version='v4',
expiration=timedelta(minutes=15),
method='PUT',
content_type='application/pdf'
)
# Pub/Sub notifications
from google.cloud import pubsub_v1
topic_name = 'projects/my-project/topics/gcs-events'
notification = bucket.notification(
topic_name=topic_name,
event_types=['OBJECT_FINALIZE'], # Object created
blob_name_prefix='uploads/'
)
notification.create()
IAM & Encryption
# IAM - Grant access
from google.cloud.storage import Bucket
policy = bucket.get_iam_policy(requested_policy_version=3)
# Add role
policy.bindings.append({
'role': 'roles/storage.objectViewer',
'members': {'user:analyst@example.com'}
})
bucket.set_iam_policy(policy)
# Check permissions
permissions = bucket.test_iam_permissions([
'storage.objects.get',
'storage.objects.list'
])
print(f"Has permissions: {permissions}")
# Customer-managed encryption (CMEK)
from google.cloud import kms_v1
kms_key_name = 'projects/my-project/locations/us/keyRings/my-ring/cryptoKeys/my-key'
blob = bucket.blob('sensitive-data.csv')
blob.kms_key_name = kms_key_name
blob.upload_from_filename('data.csv')
Best Practices
- Use uniform bucket-level access: Simpler security, easier auditing
- Enable Autoclass: Automatic cost optimization based on access patterns
- Implement lifecycle rules: Auto-delete old objects, transition storage classes
- Use signed URLs: Temporary access without exposing credentials
- Enable versioning: Protect against accidental deletion/overwrites
- Choose right location: Multi-region for HA, region for latency
- Parallel composite uploads: For files > 100 MB
- Monitor with Cloud Logging: Track access patterns, errors
- CMEK for sensitive data: Customer-managed encryption keys
- Use requester pays: For public datasets, shift egress costs to users
Cost Optimization
- Autoclass: Saves up to 80% on infrequently accessed data
- Archive storage: $0.0012/GB/month for cold data (vs $0.020/GB Standard)
- Avoid early deletion: Nearline 30d, Coldline 90d, Archive 365d minimums
- Minimize egress: Use CDN, same-region processing, batch downloads
- Lifecycle deletion: Auto-delete temp files, old logs
- Compress data: gzip before upload (30-70% savings)
References
Discover more from C4: Container, Code, Cloud & Context
Subscribe to get the latest posts sent to your email.