From bd9af2a5b0755d9e4aa01f52581db54b421c568b Mon Sep 17 00:00:00 2001 From: Zhiyuan Wang Date: Wed, 27 Mar 2024 17:09:45 -0700 Subject: Add test to reproduce "Resource deadlock would occur" in multi process DataStores Bug: 327611578 Test: ./gradlew :datastore:datastore-core:connectedAndroidTest (cherry picked from https://android-review.googlesource.com/q/commit:6dd3f37a049d4541eb239be0f5cfe7fd4ed86bcb) Merged-In: I6a5158c2ff25e28ec2dc0ed156a40fa29880fe42 Change-Id: I6a5158c2ff25e28ec2dc0ed156a40fa29880fe42 --- .../multiprocess/MultiProcessDataStoreIpcTest.kt | 114 +++++++++++++++++++++ .../core/multiprocess/ipcActions/SetTextAction.kt | 2 + 2 files changed, 116 insertions(+) diff --git a/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/MultiProcessDataStoreIpcTest.kt b/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/MultiProcessDataStoreIpcTest.kt index c3351b393d2..811c9f76994 100644 --- a/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/MultiProcessDataStoreIpcTest.kt +++ b/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/MultiProcessDataStoreIpcTest.kt @@ -30,6 +30,7 @@ import androidx.datastore.core.twoWayIpc.IpcAction import androidx.datastore.core.twoWayIpc.IpcUnit import androidx.datastore.core.twoWayIpc.TwoWayIpcSubject import androidx.datastore.testing.TestMessageProto.FooProto +import androidx.kruth.assertThrows import com.google.common.truth.Truth.assertThat import kotlinx.coroutines.CompletableDeferred import kotlinx.coroutines.CoroutineScope @@ -470,4 +471,117 @@ class MultiProcessDataStoreIpcTest { "remoteValue" ) } + + @Test + fun testConcurrentUpdateNoDeadlock_file() = testConcurrentUpdateNoDeadlock(StorageVariant.FILE) + + @Test + fun testConcurrentUpdateNoDeadlock_okio() = testConcurrentUpdateNoDeadlock(StorageVariant.OKIO) + + /** + * Reproduce the false alarm on deadlock by Linux. It happens in the case where:
+ * 1. process A holds file lock 1;
+ * 2. process B holds file lock 2;
+ * 3. process B (could be another thread than 2.) waits to hold file lock 1 (still held by A);
+ * 4. process A (could be another thread than 1.) waits to hold file lock 2 (still held by B) - + * exception "Resource deadlock would occur" is thrown - caught and retried with exponential + * backoff. + */ + private fun testConcurrentUpdateNoDeadlock( + storageVariant: StorageVariant + ) = multiProcessRule.runTest { + val connection = multiProcessRule.createConnection() + val subject1 = connection.createSubject( + multiProcessRule.datastoreScope + ) + val subject2 = connection.createSubject( + multiProcessRule.datastoreScope + ) + + val file1 = tmpFolder.newFile() + val file2 = tmpFolder.newFile() + val datastore1 = createMultiProcessTestDatastore( + filePath = file1.canonicalPath, + storageVariant = storageVariant, + hostDatastoreScope = multiProcessRule.datastoreScope, + subjects = arrayOf(subject1) + ) + val datastore2 = createMultiProcessTestDatastore( + filePath = file2.canonicalPath, + storageVariant = storageVariant, + hostDatastoreScope = multiProcessRule.datastoreScope, + subjects = arrayOf(subject2) + ) + + // setup real data and lock file + datastore1.updateData { + it.toBuilder().setText("hostData").build() + } + datastore2.updateData { + it.toBuilder().setText("hostData").build() + } + + // process A holds file lock 1 + val blockWrite = CompletableDeferred() + val startedWrite = CompletableDeferred() + + val localUpdate1 = async { + datastore1.updateData { + startedWrite.complete(Unit) + blockWrite.await() + it.toBuilder().setInteger(3).build() + } + } + startedWrite.await() + + // process B holds file lock 2 + val commitWriteLatch1 = InterProcessCompletable() + val writeStartedLatch1 = InterProcessCompletable() + val setTextAction1 = async { + subject2.invokeInRemoteProcess( + SetTextAction( + value = "remoteValue", + commitTransactionLatch = commitWriteLatch1, + transactionStartedLatch = writeStartedLatch1 + ) + ) + } + writeStartedLatch1.await(subject2) + + // process B (could be another thread than 2.) waits to hold file lock 1 + val commitWriteLatch2 = InterProcessCompletable() + val actionStartedLatch = InterProcessCompletable() + val setTextAction2 = async { + subject1.invokeInRemoteProcess( + SetTextAction( + value = "remoteValue", + commitTransactionLatch = commitWriteLatch2, + actionStartedLatch = actionStartedLatch + ) + ) + } + actionStartedLatch.await(subject1) + // wait a bit to let the other process get into updateData, might be flaky + delay(100) + + // process A (could be another thread than 1.) waits to hold file lock 2 (still held by B) + assertThrows { + datastore2.updateData { + it.toBuilder().setInteger(4).build() + } + }.hasMessageThat() + .contains("Resource deadlock would occur") + + blockWrite.complete(Unit) + commitWriteLatch1.complete(subject2, IpcUnit) + commitWriteLatch2.complete(subject1, IpcUnit) + + setTextAction1.await() + setTextAction2.await() + localUpdate1.await() + + assertThat(datastore1.data.first().text).isEqualTo("remoteValue") + assertThat(datastore1.data.first().integer).isEqualTo(3) + assertThat(datastore2.data.first().text).isEqualTo("remoteValue") + } } diff --git a/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/ipcActions/SetTextAction.kt b/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/ipcActions/SetTextAction.kt index f442d6cabbb..54440d2d080 100644 --- a/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/ipcActions/SetTextAction.kt +++ b/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/ipcActions/SetTextAction.kt @@ -28,10 +28,12 @@ internal class SetTextAction( private val value: String, private val transactionStartedLatch: InterProcessCompletable? = null, private val commitTransactionLatch: InterProcessCompletable? = null, + private val actionStartedLatch: InterProcessCompletable? = null, ) : IpcAction(), Parcelable { override suspend fun invokeInRemoteProcess( subject: TwoWayIpcSubject ): IpcUnit { + actionStartedLatch?.complete(subject, IpcUnit) subject.datastore.updateData { transactionStartedLatch?.complete(subject, IpcUnit) commitTransactionLatch?.await(subject) -- cgit v1.2.3 From aebc4a0d863cd046522bfdd843ff468d0b29710a Mon Sep 17 00:00:00 2001 From: Zhiyuan Wang Date: Wed, 27 Mar 2024 17:10:27 -0700 Subject: Add exponential backoff to MultiProcessCoordinator fileLock to mitigate Linux false alarm on "Resource deadlock would occur" error Bug: 327611578 Test: ./gradlew :datastore:datastore-core:connectedAndroidTest (cherry picked from https://android-review.googlesource.com/q/commit:859f45bc47fcc70c52b4a82bc2b09246b3fe00c7) Merged-In: I3b702bd2d0d41f58dc6691de2466a50943256f51 Change-Id: I3b702bd2d0d41f58dc6691de2466a50943256f51 --- .../multiprocess/MultiProcessDataStoreIpcTest.kt | 8 +++--- .../core/MultiProcessCoordinator.android.kt | 32 ++++++++++++++++++++-- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/MultiProcessDataStoreIpcTest.kt b/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/MultiProcessDataStoreIpcTest.kt index 811c9f76994..0da13913fb5 100644 --- a/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/MultiProcessDataStoreIpcTest.kt +++ b/datastore/datastore-core/src/androidInstrumentedTest/kotlin/androidx/datastore/core/multiprocess/MultiProcessDataStoreIpcTest.kt @@ -30,7 +30,6 @@ import androidx.datastore.core.twoWayIpc.IpcAction import androidx.datastore.core.twoWayIpc.IpcUnit import androidx.datastore.core.twoWayIpc.TwoWayIpcSubject import androidx.datastore.testing.TestMessageProto.FooProto -import androidx.kruth.assertThrows import com.google.common.truth.Truth.assertThat import kotlinx.coroutines.CompletableDeferred import kotlinx.coroutines.CoroutineScope @@ -565,12 +564,11 @@ class MultiProcessDataStoreIpcTest { delay(100) // process A (could be another thread than 1.) waits to hold file lock 2 (still held by B) - assertThrows { + val localUpdate2 = async { datastore2.updateData { it.toBuilder().setInteger(4).build() } - }.hasMessageThat() - .contains("Resource deadlock would occur") + } blockWrite.complete(Unit) commitWriteLatch1.complete(subject2, IpcUnit) @@ -579,9 +577,11 @@ class MultiProcessDataStoreIpcTest { setTextAction1.await() setTextAction2.await() localUpdate1.await() + localUpdate2.await() assertThat(datastore1.data.first().text).isEqualTo("remoteValue") assertThat(datastore1.data.first().integer).isEqualTo(3) assertThat(datastore2.data.first().text).isEqualTo("remoteValue") + assertThat(datastore2.data.first().integer).isEqualTo(4) } } diff --git a/datastore/datastore-core/src/androidMain/kotlin/androidx/datastore/core/MultiProcessCoordinator.android.kt b/datastore/datastore-core/src/androidMain/kotlin/androidx/datastore/core/MultiProcessCoordinator.android.kt index 3d3edffbf0a..e596d66eada 100644 --- a/datastore/datastore-core/src/androidMain/kotlin/androidx/datastore/core/MultiProcessCoordinator.android.kt +++ b/datastore/datastore-core/src/androidMain/kotlin/androidx/datastore/core/MultiProcessCoordinator.android.kt @@ -25,6 +25,7 @@ import java.io.IOException import java.nio.channels.FileLock import kotlin.contracts.ExperimentalContracts import kotlin.coroutines.CoroutineContext +import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.sync.Mutex import kotlinx.coroutines.sync.withLock @@ -43,7 +44,7 @@ internal class MultiProcessCoordinator( FileOutputStream(lockFile).use { lockFileStream -> var lock: FileLock? = null try { - lock = lockFileStream.getChannel().lock(0L, Long.MAX_VALUE, /* shared= */ false) + lock = getExclusiveFileLockWithRetryIfDeadlock(lockFileStream) return block() } finally { lock?.release() @@ -78,7 +79,8 @@ internal class MultiProcessCoordinator( // will throw an IOException with EAGAIN error, instead of returning null as // specified in {@link FileChannel#tryLock}. We only continue if the error // message is EAGAIN, otherwise just throw it. - if (ex.message?.startsWith(LOCK_ERROR_MESSAGE) != true) { + if ((ex.message?.startsWith(LOCK_ERROR_MESSAGE) != true) && + (ex.message?.startsWith(DEADLOCK_ERROR_MESSAGE) != true)) { throw ex } } @@ -162,6 +164,32 @@ internal class MultiProcessCoordinator( } } } + + companion object { + // Retry with exponential backoff to get file lock if it hits "Resource deadlock would + // occur" error until the backoff reaches [MAX_WAIT_MILLIS]. + private suspend fun getExclusiveFileLockWithRetryIfDeadlock( + lockFileStream: FileOutputStream + ): FileLock { + var backoff = INITIAL_WAIT_MILLIS + while (backoff <= MAX_WAIT_MILLIS) { + try { + return lockFileStream.getChannel().lock(0L, Long.MAX_VALUE, /* shared= */ false) + } catch (ex: IOException) { + if (ex.message?.contains(DEADLOCK_ERROR_MESSAGE) != true) { + throw ex + } + delay(backoff) + backoff *= 2 + } + } + return lockFileStream.getChannel().lock(0L, Long.MAX_VALUE, /* shared= */ false) + } + + private val DEADLOCK_ERROR_MESSAGE = "Resource deadlock would occur" + private val INITIAL_WAIT_MILLIS: Long = 10 + private val MAX_WAIT_MILLIS: Long = 60000 + } } /** -- cgit v1.2.3 From 7866e151eaa4ce7932fecef6f04adc2521104331 Mon Sep 17 00:00:00 2001 From: Zhiyuan Wang Date: Tue, 23 Apr 2024 19:19:31 +0000 Subject: Bump datastore release version to 1.1.1 Bug: b/333122045 (cherry picked from https://android-review.googlesource.com/q/commit:aedfa58c410fc12f44cb23532ff63bae8aa940f6) Merged-In: I6042c3a52e09c5177049890e10d59e91728a94a1 Change-Id: I6042c3a52e09c5177049890e10d59e91728a94a1 --- libraryversions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraryversions.toml b/libraryversions.toml index 7004c7ce8b4..94fcf988973 100644 --- a/libraryversions.toml +++ b/libraryversions.toml @@ -53,7 +53,7 @@ CREDENTIALS_FIDO_QUARANTINE = "1.0.0-alpha02" CURSORADAPTER = "1.1.0-alpha01" CUSTOMVIEW = "1.2.0-alpha03" CUSTOMVIEW_POOLINGCONTAINER = "1.1.0-alpha01" -DATASTORE = "1.1.0" +DATASTORE = "1.1.1" DOCUMENTFILE = "1.1.0-alpha02" DRAGANDDROP = "1.1.0-alpha01" DRAWERLAYOUT = "1.3.0-alpha01" -- cgit v1.2.3