Skip to content

Commit

Permalink
feat: Add timeout and LLU timeout (#761)
Browse files Browse the repository at this point in the history
Add support for layers to have timeout.

If a frame goes past it's hard timeout it get's killed.

LLU timeout is usually a lower value that check when the last log
update has happend. if no update happens in the LLU window it's also
killed.

Closes #462
  • Loading branch information
larsbijl committed Jan 23, 2021
1 parent 68f7c1c commit dff882c
Show file tree
Hide file tree
Showing 37 changed files with 774 additions and 35 deletions.
2 changes: 1 addition & 1 deletion VERSION.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4
0.5
1 change: 1 addition & 0 deletions cuebot/src/main/java/com/imageworks/spcue/FrameDetail.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,6 @@ public class FrameDetail extends FrameEntity implements FrameInterface {
public Timestamp dateStarted;
public Timestamp dateStopped;
public Timestamp dateUpdated;
public Timestamp dateLLU;
}

18 changes: 18 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public class LayerDetail extends LayerEntity implements LayerInterface {
public long minimumMemory;
public long minimumGpu;
public int chunkSize;
public int timeout;
public int timeout_llu;
public int dispatchOrder;
public int totalFrameCount;

Expand Down Expand Up @@ -90,6 +92,22 @@ public void setThreadable(boolean isThreadable) {
this.isThreadable = isThreadable;
}

public int getTimeout() {
return timeout;
}

public void setTimeout(int timeout) {
this.timeout = timeout;
}

public int getTimeoutLLU() {
return timeout;
}

public void setTimeoutLLU(int timeout_llu) {
this.timeout_llu = timeout_llu;
}

public long getMinimumMemory() {
return minimumMemory;
}
Expand Down
4 changes: 4 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/ServiceEntity.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,9 @@ public class ServiceEntity extends Entity {
*/
public LinkedHashSet<String> tags = new LinkedHashSet<String>();

public int timeout = 0;

public int timeout_llu = 0;

}

13 changes: 13 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/dao/FrameDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,19 @@ boolean updateFrameStopped(FrameInterface frame, FrameState state, int exitStatu
*/
ResourceUsage getResourceUsage(FrameInterface f);

/**
* Update Frame usage values for the given frame. The
* frame must be in the Running state. If the frame
* is locked by another thread, the process is aborted because
* we'll most likely get a new update one minute later.
*
* @param f
* @param lluTime
* @throws FrameReservationException if the frame is locked
* by another thread.
*/
void updateFrameUsage(FrameInterface f, long lluTime);

/**
* Update memory usage values for the given frame. The
* frame must be in the Running state. If the frame
Expand Down
18 changes: 18 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,24 @@ public interface LayerDao {
*/
void updateThreadable(LayerInterface layer, boolean threadable);

/**
* Update a layer's timeout value, which limits how
* much the frame can run on a host.
*
* @param job
* @param timeout
*/
void updateTimeout(LayerInterface layer, int timeout);

/**
* Update a layer's LLU timeout value, which limits how
* much the frame can run on a host without updates in the log file.
*
* @param job
* @param timeout
*/
void updateTimeoutLLU(LayerInterface layer, int timeout_llu);

/**
* Lowers the minimum memory on a layer if the layer
* is using less memory and the currnet min memory is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.sql.Timestamp;
import java.util.Optional;

import org.springframework.jdbc.core.RowMapper;
Expand Down Expand Up @@ -990,6 +991,21 @@ public ResourceUsage getResourceUsage(FrameInterface f) {
"pk_frame = ?", RESOURCE_USAGE_MAPPER, f.getFrameId());
}

private static final String UPDATE_FRAME_IO_USAGE =
"UPDATE " +
"frame " +
"SET " +
"ts_updated = current_timestamp," +
"ts_llu = ? " +
"WHERE " +
"pk_frame = ? ";

@Override
public void updateFrameUsage(FrameInterface f, long lluTime) {
getJdbcTemplate().update(UPDATE_FRAME_IO_USAGE,
new Timestamp(lluTime * 1000l), f.getFrameId());
}

private static final String UPDATE_FRAME_MEMORY_USAGE =
"UPDATE " +
"frame " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,20 @@ public void updateThreadable(LayerInterface layer, boolean threadable) {
threadable, layer.getLayerId());
}

@Override
public void updateTimeout(LayerInterface layer, int timeout){
getJdbcTemplate().update(
"UPDATE layer SET int_timeout=? WHERE pk_layer=?",
timeout, layer.getLayerId());
}

@Override
public void updateTimeoutLLU(LayerInterface layer, int timeout_llu){
getJdbcTemplate().update(
"UPDATE layer SET int_timeout_llu=? WHERE pk_layer=?",
timeout_llu, layer.getLayerId());
}

@Override
public void enableMemoryOptimizer(LayerInterface layer, boolean value) {
getJdbcTemplate().update(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.sql.Timestamp;
import java.util.Optional;

import org.springframework.jdbc.core.RowMapper;
Expand Down Expand Up @@ -397,6 +398,7 @@ public FrameDetail mapRow(ResultSet rs, int rowNum) throws SQLException {
frame.dateStarted = rs.getTimestamp("ts_started");
frame.dateStopped = rs.getTimestamp("ts_stopped");
frame.dateUpdated = rs.getTimestamp("ts_updated");
frame.dateLLU = rs.getTimestamp("ts_llu");
frame.version = rs.getInt("int_version");

if (rs.getString("str_host") != null) {
Expand Down Expand Up @@ -472,9 +474,10 @@ public boolean isOrphan(FrameInterface frame) {
"int_number, " +
"int_dispatch_order, " +
"int_layer_order, "+
"ts_updated "+
"ts_updated, "+
"ts_llu "+
") " +
"VALUES (?,?,?,?,?,?,?,?,current_timestamp)";
"VALUES (?,?,?,?,?,?,?,?,current_timestamp,current_timestamp)";

@Override
public void insertFrames(LayerDetail layer, List<Integer> frames) {
Expand Down Expand Up @@ -692,6 +695,7 @@ public boolean updateFrameState(FrameInterface frame, FrameState state) {
"SET " +
"str_state=?, " +
"ts_updated = current_timestamp, " +
"ts_llu = current_timestamp, " +
"int_depend_count = 0, " +
"int_version = int_version + 1 " +
"WHERE " +
Expand Down Expand Up @@ -965,6 +969,21 @@ public ResourceUsage getResourceUsage(FrameInterface f) {
"pk_frame = ?", RESOURCE_USAGE_MAPPER, f.getFrameId());
}

private static final String UPDATE_FRAME_IO_USAGE =
"UPDATE " +
"frame " +
"SET " +
"ts_updated = current_timestamp," +
"ts_llu = ? " +
"WHERE " +
"pk_frame = ? ";

@Override
public void updateFrameUsage(FrameInterface f, long lluTime) {
getJdbcTemplate().update(UPDATE_FRAME_IO_USAGE,
new Timestamp(lluTime * 1000l), f.getFrameId());
}

private static final String UPDATE_FRAME_MEMORY_USAGE =
"UPDATE " +
"frame " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ public LayerDetail mapRow(ResultSet rs, int rowNum) throws SQLException {
rs.getString("str_tags").replaceAll(" ", "").split("\\|"));
layer.services.addAll(
Lists.newArrayList(rs.getString("str_services").split(",")));
layer.timeout = rs.getInt("int_timeout");
layer.timeout_llu = rs.getInt("int_timeout_llu");
return layer;
}
};
Expand Down Expand Up @@ -310,9 +312,11 @@ public LayerInterface getLayer(String id) {
"b_threadable, " +
"int_mem_min, " +
"int_gpu_min, " +
"str_services " +
"str_services, " +
"int_timeout," +
"int_timeout_llu " +
") " +
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";

@Override
public void insertLayerDetail(LayerDetail l) {
Expand All @@ -322,7 +326,8 @@ public void insertLayerDetail(LayerDetail l) {
l.range, l.chunkSize, l.dispatchOrder,
StringUtils.join(l.tags," | "), l.type.toString(),
l.minimumCores, l.maximumCores, l.isThreadable,
l.minimumMemory, l.minimumGpu, StringUtils.join(l.services,","));
l.minimumMemory, l.minimumGpu, StringUtils.join(l.services,","),
l.timeout, l.timeout_llu);
}

@Override
Expand Down Expand Up @@ -623,6 +628,20 @@ public void updateThreadable(LayerInterface layer, boolean threadable) {
threadable, layer.getLayerId());
}

@Override
public void updateTimeout(LayerInterface layer, int timeout){
getJdbcTemplate().update(
"UPDATE layer SET int_timeout=? WHERE pk_layer=?",
timeout, layer.getLayerId());
}

@Override
public void updateTimeoutLLU(LayerInterface layer, int timeout_llu){
getJdbcTemplate().update(
"UPDATE layer SET int_timeout_llu=? WHERE pk_layer=?",
timeout_llu, layer.getLayerId());
}

@Override
public void enableMemoryOptimizer(LayerInterface layer, boolean value) {
getJdbcTemplate().update(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ public ServiceEntity mapRow(ResultSet rs, int rowNum) throws SQLException {
s.minGpu = rs.getLong("int_gpu_min");
s.threadable = rs.getBoolean("b_threadable");
s.tags = splitTags(rs.getString("str_tags"));
s.timeout = rs.getInt("int_timeout");
s.timeout_llu = rs.getInt("int_timeout_llu");
return s;
}
};
Expand All @@ -81,6 +83,8 @@ public ServiceOverrideEntity mapRow(ResultSet rs, int rowNum)
s.threadable = rs.getBoolean("b_threadable");
s.tags = splitTags(rs.getString("str_tags"));
s.showId = rs.getString("pk_show");
s.timeout = rs.getInt("int_timeout");
s.timeout_llu = rs.getInt("int_timeout_llu");
return s;
}
};
Expand All @@ -94,7 +98,9 @@ public ServiceOverrideEntity mapRow(ResultSet rs, int rowNum)
"service.int_cores_max," +
"service.int_mem_min," +
"service.int_gpu_min," +
"service.str_tags " +
"service.str_tags, " +
"service.int_timeout, " +
"service.int_timeout_llu " +
"FROM " +
"service ";

Expand All @@ -114,7 +120,9 @@ public ServiceEntity get(String id) {
"show_service.int_cores_max, "+
"show_service.int_mem_min," +
"show_service.int_gpu_min," +
"show_service.str_tags, " +
"show_service.str_tags," +
"show_service.int_timeout," +
"show_service.int_timeout_llu," +
"show.pk_show " +
"FROM " +
"show_service," +
Expand Down Expand Up @@ -160,16 +168,19 @@ public boolean isOverridden(String service, String show) {
"int_cores_max, "+
"int_mem_min," +
"int_gpu_min," +
"str_tags" +
") VALUES (?,?,?,?,?,?,?,?)";
"str_tags," +
"int_timeout," +
"int_timeout_llu " +
") VALUES (?,?,?,?,?,?,?,?,?,?)";

@Override
public void insert(ServiceEntity service) {
service.id = SqlUtil.genKeyRandom();
getJdbcTemplate().update(INSERT_SERVICE, service.id,
service.name, service.threadable, service.minCores,
service.maxCores, service.minMemory, service.minGpu,
StringUtils.join(service.tags.toArray(), " | "));
StringUtils.join(service.tags.toArray(), " | "),
service.timeout, service.timeout_llu);
}

private static final String INSERT_SERVICE_WITH_SHOW =
Expand All @@ -184,16 +195,19 @@ public void insert(ServiceEntity service) {
"int_cores_max," +
"int_mem_min," +
"int_gpu_min," +
"str_tags " +
") VALUES (?,?,?,?,?,?,?,?,?)";
"str_tags," +
"int_timeout," +
"int_timeout_llu " +
") VALUES (?,?,?,?,?,?,?,?,?,?,?)";

@Override
public void insert(ServiceOverrideEntity service) {
service.id = SqlUtil.genKeyRandom();
getJdbcTemplate().update(INSERT_SERVICE_WITH_SHOW, service.id,
service.showId, service.name, service.threadable,
service.minCores, service.maxCores, service.minMemory,
service.minGpu, joinTags(service.tags));
service.minGpu, joinTags(service.tags),
service.timeout, service.timeout_llu);
}

private static final String UPDATE_SERVICE =
Expand All @@ -206,7 +220,9 @@ public void insert(ServiceOverrideEntity service) {
"int_cores_max=?,"+
"int_mem_min=?," +
"int_gpu_min=?," +
"str_tags=? " +
"str_tags=?," +
"int_timeout=?," +
"int_timeout_llu=? " +
"WHERE " +
"pk_service = ?";

Expand All @@ -215,7 +231,7 @@ public void update(ServiceEntity service) {
getJdbcTemplate().update(UPDATE_SERVICE, service.name,
service.threadable, service.minCores, service.maxCores,
service.minMemory, service.minGpu, joinTags(service.tags),
service.getId());
service.timeout, service.timeout_llu, service.getId());
}

private static final String UPDATE_SERVICE_WITH_SHOW =
Expand All @@ -228,7 +244,9 @@ service.minMemory, service.minGpu, joinTags(service.tags),
"int_cores_max=?," +
"int_mem_min=?," +
"int_gpu_min=?," +
"str_tags=? " +
"str_tags=?," +
"int_timeout=?," +
"int_timeout_llu=? " +
"WHERE " +
"pk_show_service = ?";

Expand All @@ -237,7 +255,7 @@ public void update(ServiceOverrideEntity service) {
getJdbcTemplate().update(UPDATE_SERVICE_WITH_SHOW, service.name,
service.threadable, service.minCores, service.maxCores,
service.minMemory, service.minGpu, joinTags(service.tags),
service.getId());
service.timeout, service.timeout_llu, service.getId());
}

@Override
Expand Down

0 comments on commit dff882c

Please sign in to comment.