196 lines
6.5 KiB
Python
196 lines
6.5 KiB
Python
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
import glob
|
|
import PIL
|
|
from PIL import Image
|
|
import numpy as np
|
|
import argparse
|
|
|
|
import torch
|
|
import torch.utils.data
|
|
from torch.autograd import Variable
|
|
import torch.nn as nn
|
|
from torchsummary import summary
|
|
import time
|
|
|
|
import pandas as pd
|
|
|
|
np.random.seed(99)
|
|
torch.manual_seed(99)
|
|
|
|
|
|
class ThreeLayerCNN(torch.nn.Module):
|
|
"""
|
|
Input: 128x128 face image (eye aligned).
|
|
Output: 1-D tensor with 2 elements. Used for binary classification.
|
|
Parameters:
|
|
Number of conv layers: 3
|
|
Number of fully connected layers: 2
|
|
"""
|
|
def __init__(self):
|
|
super(ThreeLayerCNN,self).__init__()
|
|
self.conv1 = torch.nn.Conv2d(3,6,5)
|
|
self.pool = torch.nn.MaxPool2d(2,2)
|
|
self.conv2 = torch.nn.Conv2d(6,16,5)
|
|
self.conv3 = torch.nn.Conv2d(16,16,6)
|
|
self.fc1 = torch.nn.Linear(16*4*4,120)
|
|
self.fc2 = torch.nn.Linear(120,2)
|
|
|
|
|
|
def forward(self, x):
|
|
x = self.pool(torch.nn.functional.relu(self.conv1(x)))
|
|
x = self.pool(torch.nn.functional.relu(self.conv2(x)))
|
|
x = self.pool(torch.nn.functional.relu(self.conv3(x)))
|
|
x = x.view(-1,16*4*4)
|
|
x = torch.nn.functional.relu(self.fc1(x))
|
|
x = self.fc2(x)
|
|
return x
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--data_dir', type=str, help='Dataset directory path')
|
|
parser.add_argument('--result_path', type=str, help='Result model path')
|
|
parser.add_argument('--label_dir', type=str, help='Label directory path')
|
|
args = parser.parse_args()
|
|
|
|
image_dir = args.data_dir
|
|
result_dir = args.result_path
|
|
label_dir = args.label_dir
|
|
|
|
""" Load and Process Images """
|
|
|
|
races_to_consider = [0,4]
|
|
unprivileged_groups = [{'race': 4.0}]
|
|
privileged_groups = [{'race': 0.0}]
|
|
favorable_label = 0.0
|
|
unfavorable_label = 1.0
|
|
|
|
img_size = 64
|
|
|
|
protected_race = []
|
|
outcome_gender = []
|
|
feature_image = []
|
|
feature_age = []
|
|
|
|
for i, image_path in enumerate(glob.glob(image_dir + "*.jpg")):
|
|
try:
|
|
age, gender, race = image_path.split('/')[-1].split("_")[:3]
|
|
age = int(age)
|
|
gender = int(gender)
|
|
race = int(race)
|
|
|
|
if race in races_to_consider:
|
|
protected_race.append(race)
|
|
outcome_gender.append(gender)
|
|
feature_image.append(np.array(Image.open(image_path).resize((img_size, img_size))))
|
|
feature_age.append(age)
|
|
except:
|
|
print("Missing: " + image_path)
|
|
|
|
feature_image_mat = np.array(feature_image)
|
|
outcome_gender_mat = np.array(outcome_gender)
|
|
protected_race_mat = np.array(protected_race)
|
|
age_mat = np.array(feature_age)
|
|
|
|
""" Split the dataset into train and test """
|
|
|
|
feature_image_mat_normed = 2.0 *feature_image_mat.astype('float32')/256.0 - 1.0
|
|
|
|
N = len(feature_image_mat_normed)
|
|
ids = np.random.permutation(N)
|
|
train_size=int(0.7 * N)
|
|
X_train = feature_image_mat_normed[ids[0:train_size]]
|
|
y_train = outcome_gender_mat[ids[0:train_size]]
|
|
X_test = feature_image_mat_normed[ids[train_size:]]
|
|
y_test = outcome_gender_mat[ids[train_size:]]
|
|
|
|
p_train = protected_race_mat[ids[0:train_size]]
|
|
p_test = protected_race_mat[ids[train_size:]]
|
|
|
|
age_train = age_mat[ids[0:train_size]]
|
|
age_test = age_mat[ids[train_size:]]
|
|
|
|
batch_size = 64
|
|
|
|
X_train = X_train.transpose(0,3,1,2)
|
|
X_test = X_test.transpose(0,3,1,2)
|
|
|
|
train = torch.utils.data.TensorDataset(Variable(torch.FloatTensor(X_train.astype('float32'))), Variable(torch.LongTensor(y_train.astype('float32'))))
|
|
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
|
|
test = torch.utils.data.TensorDataset(Variable(torch.FloatTensor(X_test.astype('float32'))), Variable(torch.LongTensor(y_test.astype('float32'))))
|
|
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)
|
|
|
|
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
|
model = ThreeLayerCNN().to(device)
|
|
summary(model, (3,img_size,img_size))
|
|
|
|
|
|
""" Training the network """
|
|
|
|
num_epochs = 5
|
|
learning_rate = 0.001
|
|
print_freq = 100
|
|
|
|
# Specify the loss and the optimizer
|
|
criterion = nn.CrossEntropyLoss()
|
|
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
|
|
|
|
# Start training the model
|
|
num_batches = len(train_loader)
|
|
for epoch in range(num_epochs):
|
|
for idx, (images, labels) in enumerate(train_loader):
|
|
images = images.to(device)
|
|
labels = labels.to(device)
|
|
|
|
outputs = model(images)
|
|
loss = criterion(outputs, labels)
|
|
optimizer.zero_grad()
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
if (idx+1) % print_freq == 0:
|
|
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, idx+1, num_batches, loss.item()))
|
|
|
|
# Run model on test set in eval mode.
|
|
model.eval()
|
|
correct = 0
|
|
y_pred = []
|
|
with torch.no_grad():
|
|
for images, labels in test_loader:
|
|
images = images.to(device)
|
|
labels = labels.to(device)
|
|
outputs = model(images)
|
|
_, predicted = torch.max(outputs.data, 1)
|
|
correct += predicted.eq(labels.data.view_as(predicted)).sum().item()
|
|
y_pred += predicted.tolist()
|
|
print('Test_set accuracy: ' + str(100. * correct / len(test_loader.dataset)) + '%')
|
|
# convert y_pred to np array
|
|
y_pred = np.array(y_pred)
|
|
|
|
# Save the entire model to enable automated serving
|
|
torch.save(model.state_dict(), result_dir)
|
|
print("Model saved at " + result_dir)
|
|
|
|
# Save labels and protected features for fairness check.
|
|
np.savetxt(label_dir + '/y_train.out', y_train)
|
|
np.savetxt(label_dir + '/p_train.out', p_train)
|
|
np.savetxt(label_dir + '/y_test.out', y_test)
|
|
np.savetxt(label_dir + '/p_test.out', p_test)
|
|
np.savetxt(label_dir + '/y_pred.out', y_pred)
|
|
np.save(label_dir + '/x_test', X_test)
|
|
|
|
print("Labels stored at directory " + label_dir)
|