Skip to content

Commit

Permalink
#136, #120, #125 - fixes to checkpoint restart, moncs_per_io. This ha…
Browse files Browse the repository at this point in the history
…s been fully tested with cray and gnu compiler on MONSOON, see https://code.metoffice.gov.uk/trac/monc/wiki/MoncMain/MoncTesting#trunkr2460withCASIMoptimisationsOOMfixMONCS_per_IOfixsee136120125 for details and test report

Merged into /main/trunk: /main/branches/dev/adrianhill/r2460_io_oom_fix@2468 cf. /main/trunk@2460


git-svn-id: https://code.metoffice.gov.uk/svn/monc/main/trunk@2477 0f676ef4-b20c-4647-9485-21614760d15f
  • Loading branch information
adrianhill committed Jan 31, 2017
1 parent bc213b8 commit 7dacb6c
Show file tree
Hide file tree
Showing 59 changed files with 883 additions and 466 deletions.
6 changes: 5 additions & 1 deletion components/checkpointer/src/writecheckpoint.F90
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,17 @@ subroutine write_checkpoint_file(current_state, filename)
dtm_id, dtm_new_id, absolute_new_dtm_id
logical :: q_indices_declared

! If there are multiple processes then open for parallel IO
#ifdef SINGLE_MONC_DO_SEQUENTIAL_NETCDF
if (current_state%parallel%processes .gt. 1) then
call check_status(nf90_create(filename, ior(NF90_NETCDF4, NF90_MPIIO), ncid, &
comm = current_state%parallel%monc_communicator, info = MPI_INFO_NULL))
else
call check_status(nf90_create(filename, NF90_CLOBBER, ncid))
end if
#else
call check_status(nf90_create(filename, ior(NF90_NETCDF4, NF90_MPIIO), ncid, &
comm = current_state%parallel%monc_communicator, info = MPI_INFO_NULL))
#endif
call write_out_global_attributes(ncid)
call define_grid_dimensions(current_state, ncid, z_dim_id, y_dim_id, x_dim_id)
if (current_state%number_q_fields .gt. 0) call define_q_field_dimension(current_state, ncid, q_dim_id)
Expand Down
2 changes: 1 addition & 1 deletion fcm-make/casim.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ extract.path-excl[monc] = / components/casim/src/casim_stub.F90 components/petsc
extract.path-incl[monc] = components model_core io misc testcases monc_driver.F90

extract.location{primary}[casim] = fcm:casim.x_tr
$casim_revision{?} =
$casim_revision{?} = 2448
extract.location[casim] = @$casim_revision
extract.location{diff}[casim] =
extract.path-incl[casim] = src
Expand Down
2 changes: 1 addition & 1 deletion fcm-make/casim_mirror.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ extract.path-excl[monc] = / components/casim/src/casim_stub.F90 components/petsc
extract.path-incl[monc] = components model_core io misc testcases monc_driver.F90

extract.location{primary}[casim] = fcm:casim.xm_tr
$casim_revision{?} = vn0.1
$casim_revision{?} = 2448
extract.location[casim] = @$casim_revision
extract.location{diff}[casim] =
extract.path-incl[casim] = src
Expand Down
2 changes: 1 addition & 1 deletion io/io_cfg_files/checkpoint.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
</group>

<data-writing>
<file name="{checkpoint_file}" write_timestep_frequency="{checkpoint_frequency}" title="Checkpoint" write_on_terminate="true">
<file name="{checkpoint_file}" write_timestep_frequency="{checkpoint_frequency}" title="Checkpoint" write_on_terminate="true" store_state="false">
<include group="checkpoint_fields" time_manipulation="none"/>
<include state="io"/>
</file>
Expand Down
10 changes: 9 additions & 1 deletion io/src/configurationparser.F90
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ module configuration_parser_mod
character(len=STRING_LENGTH) :: file_name, title
integer :: number_of_contents, write_timestep_frequency
real :: write_time_frequency
logical :: write_on_model_time, write_on_terminate
logical :: write_on_model_time, write_on_terminate, include_in_io_state_write
type(io_configuration_file_writer_facet_type), dimension(:), allocatable :: contents
end type io_configuration_file_writer_type

Expand Down Expand Up @@ -633,6 +633,14 @@ subroutine define_file_writer(attribute_names, attribute_values)
else
building_config%file_writers(current_building_file_writer)%write_on_terminate=.false.
end if

field_index=get_field_index_from_name(attribute_names, "store_state")
if (field_index .gt. 0) then
building_config%file_writers(current_building_file_writer)%include_in_io_state_write=&
retrieve_string_value(attribute_values(field_index), STRING_DATA_TYPE) == "true"
else
building_config%file_writers(current_building_file_writer)%include_in_io_state_write=.true.
end if

building_config%file_writers(current_building_file_writer)%number_of_contents=0
allocate(building_config%file_writers(current_building_file_writer)%contents(DATA_SIZE_STRIDE))
Expand Down
261 changes: 187 additions & 74 deletions io/src/io_state_reader.F90

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions io/src/ioserver.F90
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ module io_server_mod
use global_callback_inter_io_mod, only : perform_global_callback
use logging_mod, only : LOG_ERROR, LOG_WARN, log_log, initialise_logging
use mpi, only : MPI_COMM_WORLD, MPI_STATUSES_IGNORE, MPI_BYTE
use io_server_state_reader_mod, only : read_io_server_state
use io_server_state_reader_mod, only : read_io_server_configuration
implicit none

#ifndef TEST_MODE
Expand Down Expand Up @@ -74,7 +74,8 @@ subroutine io_server_run(options_database, io_communicator_arg, &

if (continuation_run) then
! Handle case where we need to allocate this due to no IO server config
call read_io_server_state(options_get_string(options_database, "checkpoint"), io_xml_configuration, io_communicator_arg)
call read_io_server_configuration(options_get_string(options_database, "checkpoint"), &
io_xml_configuration, io_communicator_arg)
end if

if (.not. allocated(io_xml_configuration)) then
Expand Down
16 changes: 7 additions & 9 deletions io/src/writers/file_types/netcdf_filetype.F90
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,8 @@ subroutine define_netcdf_file(io_configuration, file_writer_information, timeste
end if
call check_thread_status(forthread_mutex_lock(netcdf_mutex))
call lock_mpi()
if (io_configuration%number_of_io_servers .gt. 1) then
call check_netcdf_status(nf90_create(unique_filename, ior(NF90_NETCDF4, NF90_MPIIO), ncdf_writer_state%ncid, &
comm = io_configuration%io_communicator, info = MPI_INFO_NULL))
else
call check_netcdf_status(nf90_create(unique_filename, NF90_CLOBBER, ncdf_writer_state%ncid))
end if
call check_netcdf_status(nf90_create(unique_filename, ior(NF90_NETCDF4, NF90_MPIIO), ncdf_writer_state%ncid, &
comm = io_configuration%io_communicator, info = MPI_INFO_NULL))
call unlock_mpi()
call write_out_global_attributes(ncdf_writer_state%ncid, file_writer_information, timestep, time)
call define_dimensions(ncdf_writer_state, io_configuration%dimension_sizing)
Expand All @@ -123,8 +119,10 @@ end subroutine define_netcdf_file
!! @param io_configuration The IO server configuration
!! @param file_writer_information The file writer information
!! @param timestep The write timestep
subroutine store_io_server_state(io_configuration, file_writer_information, timestep)
subroutine store_io_server_state(io_configuration, writer_entries, time_points, file_writer_information, timestep)
type(io_configuration_type), intent(inout) :: io_configuration
type(writer_type), volatile, dimension(:), intent(inout) :: writer_entries
type(hashmap_type), volatile, intent(inout) :: time_points
type(writer_type), intent(inout), target :: file_writer_information
integer, intent(in) :: timestep

Expand All @@ -134,11 +132,11 @@ subroutine store_io_server_state(io_configuration, file_writer_information, time
call lock_mpi()
call check_netcdf_status(nf90_redef(ncdf_writer_state%ncid))
call unlock_mpi()
call define_io_server_state_contributions(io_configuration, ncdf_writer_state)
call define_io_server_state_contributions(io_configuration, writer_entries, time_points, ncdf_writer_state)
call lock_mpi()
call check_netcdf_status(nf90_enddef(ncdf_writer_state%ncid))
call unlock_mpi()
call write_io_server_state(io_configuration, ncdf_writer_state)
call write_io_server_state(io_configuration, writer_entries, time_points, ncdf_writer_state)
end subroutine store_io_server_state

!> Looks up and retrieves the writer entry that corresponds to this NetCDF file state
Expand Down
Loading

0 comments on commit 7dacb6c

Please sign in to comment.