Commit 5b4bf347 authored by 蒙律师's avatar 蒙律师

add Model2.py

parent afd471d4
import torch
import torchvision
from torch import optim, nn
import math
import numpy as np
import pandas as pd
class CustomizedLSTM(nn.Module):
def __init__(self, x_size, hidden_size, event_class, emb_dim):
'''
:param hidden_size: the dimension of h
:param event_class: the number of event classes
:param emb_dim: the dimension of embedding event
'''
super(CustomizedLSTM, self).__init__()
self.hidden_size = hidden_size
self.embedding = nn.Embedding(num_embeddings=event_class, embedding_dim=emb_dim)
# Parameter to define forget gate
self.Wf = nn.Parameter(torch.Tensor(x_size + hidden_size + 1, hidden_size))
# self.bf = nn.Parameter(torch.Tensor(hidden_size))
# Parameter to define input gate
self.Wi = nn.Parameter(torch.Tensor(x_size + hidden_size + 1, hidden_size))
# Parameter to define C helper \tilde{c}:
self.Wc = nn.Parameter(torch.Tensor(x_size + hidden_size + 1, hidden_size))
# Parameter to define Time Gate
self.Wtx = nn.Parameter(torch.Tensor(x_size + 1, hidden_size))
self.Wt = nn.Parameter(torch.Tensor(1 + 1, hidden_size))
# self.bt = nn.Parameter(torch.Tensor(hidden_size))
# Parameter to define Event Gate
self.Wex = nn.Parameter(torch.Tensor(x_size + 1, hidden_size))
self.We = nn.Parameter(torch.Tensor(emb_dim + 1, hidden_size))
# self.bt = nn.Parameter(torch.Tensor(hidden_size))
# Parameter to define Delta_Feature Gate
self.Wdx = nn.Parameter(torch.Tensor(x_size + 1, hidden_size))
self.Wd = nn.Parameter(torch.Tensor(x_size + 1, hidden_size))
# Parameter to define output Gate
self.Wo = nn.Parameter(torch.Tensor(2 * x_size + hidden_size + 1 + emb_dim + 1, hidden_size))
# Parameter to define \lambda
self.v_t = nn.Parameter(torch.Tensor(hidden_size, 1))
self.w_t = nn.Parameter(torch.tensor(0.1))
# network architecture to derive event
self.event_linear = torch.nn.Linear(in_features=hidden_size, out_features=event_class, bias=True)
#
self.softplus = torch.nn.Softplus()
self.reset_Parameter()
def forward(self, X, Event, D_time, D_x, init_h=None, init_c=None):
'''
:param X: [batch_size, feature_size, seq_len]
:param Event: [batch_size, seq_len]
:param D_time: [batch_size, seq_len]
:param D_x: [batch_size, feature_size, seq_len]
:param init_h: [batch_size, hidden_size]
:param init_c: [batch_size, hidden_size]
:return:
'''
batch_size, sequence_len = Event.shape
h_seq = torch.Tensor([])
c_seq = torch.Tensor([])
if init_h is None and init_c is None:
h_last = torch.zeros(batch_size, self.hidden_size) # .to(x.device),
c_last = torch.zeros(batch_size, self.hidden_size) # .to(x.device),
else:
h_last = init_h
c_last = init_c
AllOnesTensor = torch.ones(batch_size, 1)
# Embed_E = torch.Tensor([])
Embed_E = self.embedding(Event.long())
'''
for event in Event:
# event is the event list for one entity.
e_lst = torch.Tensor([])
for single_event in event:
if single_event != 0:
embedding_event = self.embedding(single_event.to(torch.int64))
else:
embedding_event = torch.zeros(self.embedding.embedding_dim)
e_lst = torch.cat((e_lst, embedding_event.unsqueeze(1)), dim=1)
Embed_E = torch.cat((Embed_E, e_lst.unsqueeze(0)), dim=0)
'''
for t in range(sequence_len):
e = Embed_E[:, t, :]
x_t = X[:, :, t]
d_time = D_time[:, t].reshape(-1, 1)
d_x = D_x[:, :, t]
combine = torch.cat((x_t, h_last, AllOnesTensor), dim=1).float()
x_t_bias = torch.cat((x_t, AllOnesTensor), dim=1).float()
d_time_bias = torch.cat((d_time, AllOnesTensor), dim=1).float()
d_x_bias = torch.cat((d_x, AllOnesTensor), dim=1).float()
Embed_E_bias = torch.cat((e, AllOnesTensor), dim=1).float()
# Forget gate:
f = torch.sigmoid(torch.mm(combine, self.Wf))
# Input gate:
i = torch.sigmoid(torch.mm(combine, self.Wi))
# Time gate:
T = torch.sigmoid(torch.mm(x_t_bias, self.Wtx) + torch.sigmoid(torch.mm(d_time_bias, self.Wt)))
# Event Gate:
E = torch.sigmoid(torch.mm(x_t_bias, self.Wex) + torch.sigmoid(torch.mm(Embed_E_bias, self.We)))
# Delta Feature Gate:
self.Wd = nn.Parameter((-1) * torch.relu_((-1) * self.Wd)) # Constraint: Wd <= 0
D = 2 * torch.sigmoid(torch.mm(x_t_bias, self.Wdx) + torch.sigmoid(torch.mm(d_x_bias, self.Wd)))
# C helper:
c_helper = torch.tanh(torch.mm(combine, self.Wc))
# Output Gate:
allvariable = torch.cat((x_t, h_last, d_time, e, d_x, AllOnesTensor), dim=1).float()
o = torch.sigmoid(torch.mm(allvariable, self.Wo))
# C short term memory:
C_s = f * c_last + i * D * c_helper
# C_s = f * c_last + i * c_helper
# C long term meomry:
C = f * c_last + i * T * E * c_helper
# C = f * c_last + i * c_helper
# Output h:
h = o * torch.tanh(C_s)
c_last = C
h_last = h
h_seq = torch.cat((h_seq, h.unsqueeze(0)), 1)
c_seq = torch.cat((c_seq, C.unsqueeze(0)), 1)
return h_seq, c_seq
def set_optimizer(self, lr=1e-3, weight_decay=1e-3):
# from torch.optim import Adam
# self.optimizer = Adam(self.parameters(), lr=lr, weight_decay=weight_decay)
from torch.optim import SGD
self.optimizer = SGD(self.parameters(), lr=lr, weight_decay=weight_decay) #, momentum=0.9)
'''
if use_bert:
self.optimizer = BertAdam(params=self.parameters(),
lr=self.config.lr,
warmup=0.1,
t_total=total_step)
else:
'''
def reset_Parameter(self):
stdv = 1.0 / math.sqrt(self.hidden_size)
for weight in self.parameters():
weight.data.uniform_(-stdv, stdv)
def init_Parameter(self):
stdv = 1.0 / math.sqrt(self.hidden_size)
for weight in self.parameters():
weight.data.uniform_(-stdv, stdv)
def softplus(self, x):
return torch.log(1 + torch.exp(x))
def my_loss(self, target_event, t, t_j, h_j):
'''
The loss is log(P(y_t|h_{tj})) + log(f(t|h_{tj})), where:
P(y_t|h_{tj}): the predicted probability that entity will experience ground truth event y_t at t, given history till t_last
f(t|h_{t_last}): the predicted condition intensity function that entity will experience an unspecific event at t, given hisotry till t_last
:param t: ground truth time when next event occur
:param y_t: ground truth event type for next event
:param t_j: the last time entity experience an event
:param h_j: the history till t_j
:return: Loss
'''
event_pred = torch.softmax(self.event_linear(h_j), dim=1)
event_loss = torch.zeros_like(target_event).float()
for idx, event_id in enumerate(target_event):
event_loss[idx] = event_pred[idx, int(event_id.item())]
event_loss = (-1) * torch.log(event_loss)
self.w_t = nn.Parameter(self.softplus(self.w_t))
S = torch.mm(h_j, self.v_t)
log_lambda = S + self.w_t * (t - t_j)
# print('The lambda is: {}'.format(torch.exp(log_lambda)))
log_f_t = S + self.w_t * (t - t_j) + (1 / self.w_t) * (torch.exp(S) - torch.exp(S + self.w_t * (t - t_j)))
time_loss = (-1) * log_f_t
# time_loss = (t - t_j) ** 2
# print('time_loss:type', type(time_loss))
loss = torch.mean(time_loss) # + torch.mean(event_loss)
# print('loss:type', type(loss))
return loss, log_f_t, log_lambda
def train_batch_all(self, batch):
'''
:param batch: includes X, event and time.
X: [batch_size, feature_size, seq_len]
event: [batch_size, seq_len]
time: [batch_size, seq_len]
:return: train loss
'''
X, event, time = batch
X, event, time = torch.tensor(X), torch.tensor(event), torch.tensor(time)
batch_size, feature_size, seq_len = X.shape
D_time = time - torch.cat((torch.zeros(batch_size, 1), time[:, 1:]), dim=1)
D_time[:, 0] = 0 # Time interval is 0 at first event.
D_X = X - torch.cat((torch.zeros(batch_size, feature_size, 1), X[:, :, 1:]), dim=2)
D_X[:, :, 0] = 0 # Feature difference is 0 at first event
h_seq, c_seq = self.forward(X, event, D_time, D_X)
LOSS = None
log_ft_lst = []
log_lambda_lst = []
for t_idx in range(seq_len - 1):
current_t, current_h = time[:, t_idx], h_seq[:, t_idx, :]
target_event, target_t = event[:, t_idx + 1], time[:, t_idx + 1]
loss, log_f_t, log_lambda = self.my_loss(target_event, target_t, current_t, current_h)
log_ft_lst.append(log_f_t.clone().detach().item())
log_lambda_lst.append(log_lambda.clone().detach().item())
if LOSS is None:
LOSS = loss
else:
LOSS += loss
LOSS /= seq_len
print('LOSS:', LOSS)
LOSS.backward()
self.optimizer.step()
# print('what is the v_t grad?', self.v_t.grad)
# print('what is the w_t?', self.w_t)
# print('what is the w_t grad?', self.w_t.grad)
# for weight in self.parameters():
# print('the weight is {}, and the grad is {}'.format(weight, weight.grad))
self.optimizer.zero_grad()
return LOSS, log_ft_lst, log_lambda_lst
def calculate_ft(self, t, t_j, h_j, c_j):
v_t = self.v_t.detach().numpy()
w_t = self.softplus(self.w_t).detach().item()
combine = np.concatenate((h_j, c_j), axis=0)[np.newaxis, :]
S = np.dot(combine, v_t)
log_f_t = S + w_t * (t - t_j) + 1 / w_t * (np.exp(S) - np.exp(S + w_t * (t - t_j)))
f_t = np.exp(log_f_t)
return f_t
def f_t(self, t, t_j, h_j):
'''
This function is used to derive the definite integral: equation (13).
Firstly we should generated f^{*}(t) in equation (12),
Then return t*f^{*}(t) to scipy.integrate.quad to get the result.
:param t: t in equation (12)
:param t_j: t_j in equation (12)
:param h_j: h_j in equation (12)
:return: t * f^{*}(t) , simplictly, t * f_t
'''
v_t = self.v_t.clone().detach().numpy()
w_t = self.softplus(self.w_t.clone()).detach().item()
S = np.dot(h_j, v_t)
log_f_t = S + w_t * (t - t_j) + 1 / w_t * (np.exp(S) - np.exp(S + w_t * (t - t_j)))
f_t = np.exp(log_f_t)
return (t * f_t)
def next_time(self, t_j, h_j, c_j):
umax = 100 # self.umax # maximum time
Deltat = umax / 100
dt = torch.linspace(0, umax, 100 + 1)
combine = torch.cat((h_j, c_j), axis=0).unsqueeze(0)
S = torch.mm(combine, self.v_t)
self.w_t = nn.Parameter(self.softplus(self.w_t))
log_f_t = S + self.w_t * dt + 1 / self.w_t * (torch.exp(S) - torch.exp(S + self.w_t * dt))
f_t = torch.exp(log_f_t)
# print('hj+cj: {}, S: {}, v_t: {}'.format(combine, S, self.v_t))
df = dt * f_t
# normalization factor
integrand_ = ((df[1:] + df[:-1]) * 0.5) * Deltat
integral_ = torch.sum(integrand_)
return t_j + integral_
def predict_batch(self, batch):
'''
:param batch: includes X, event and time.
X: [batch_size, feature_size, seq_len]
event: [batch_size, seq_len]
time: [batch_size, seq_len]
:return: train loss
'''
X, event, time = batch
X = torch.tensor(X)
event = torch.tensor(event)
time = torch.tensor(time)
batch_size, feature_size, seq_len = X.shape
D_time = time - torch.cat((torch.zeros(batch_size, 1), time[:, 1:]), dim=1)
D_time[:, 0] = 0 # Time interval is 0 at first event.
D_X = X - torch.cat((torch.zeros(batch_size, feature_size, 1), X[:, :, 1:]), dim=2)
D_X[:, :, 0] = 0 # Feature difference is 0 at first event
h_seq, c_seq = self.forward(X, event, D_time, D_X)
LOSS = 0
predicted_time_list = []
pred_inter_event_time_lst = []
gt_inter_event_time_lst = []
for t_idx in range(seq_len - 1):
current_t, current_h = time[:, t_idx], h_seq[:, t_idx, :]
target_event, target_t = event[:, t_idx + 1], time[:, t_idx + 1]
event_pred = torch.argmax(torch.softmax(self.event_linear(current_h), dim=1), dim=1)
event_acc = torch.sum((event_pred - target_event) == 0).float() / batch_size
pred_time = []
from scipy import integrate
OFFSET = 0
for idx, t_j in enumerate(current_t):
t_j = t_j.clone().detach().numpy()
h_j = current_h[idx].clone().detach().numpy()
# predicted_time = self.next_time(t_j, h_j)
predicted_time, err = integrate.quad(self.f_t, t_j, np.inf,
args=(t_j, h_j)) # equation (13), do the integration
#=print('the current time is, {}, the target time is, {}, the predicted time is {}'
# .format(t_j, target_t[idx], predicted_time))
pred_inter_event_time_lst.append(predicted_time - t_j)
gt_inter_event_time_lst.append(target_t[idx] - t_j)
offset = predicted_time - target_t[idx]
OFFSET += offset
pred_time.append(predicted_time)
loss, log_f_t, log_lambda_t = self.my_loss(target_event, target_t, current_t, current_h)
LOSS += loss
predicted_time_list.append(pred_time)
LOSS /= seq_len
print('The offset between predicted time and gt time is:', OFFSET)
print(pred_inter_event_time_lst)
return LOSS, log_f_t, predicted_time_list, event_acc, pred_inter_event_time_lst, gt_inter_event_time_lst
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment